{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "947c906f-136a-4f3c-9345-c57f0ed34124",
   "metadata": {},
   "source": [
    "# 划分训练集和测试集\n",
    "\n",
    "kjh 2024/8/1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "11c7d96b-19a4-4236-bf1f-2977f2dcba99",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import shutil\n",
    "import random\n",
    "from tqdm import tqdm"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bbc31fb8",
   "metadata": {},
   "source": [
    "需要自己准备的格式：\n",
    "-729data    将脚本放在datasets下\n",
    "--images\n",
    "--labelme"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "23aeda88-1d70-40ca-b895-6f57a84dda58",
   "metadata": {},
   "outputs": [],
   "source": [
    "Dataset_root = 'tgwV4.0'#自己命名"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "44330058-96ad-4898-8dd0-e358a5c30cf0",
   "metadata": {},
   "outputs": [],
   "source": [
    "os.chdir(os.path.join(Dataset_root, 'images2obb'))#进入tgw\\images文件中"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ca2838dc-aca4-4822-ad1d-bb2fd771223a",
   "metadata": {},
   "outputs": [],
   "source": [
    "os.listdir()#列出里面的文件"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "17c68c29-c117-47ad-b407-e0a53d3c5f52",
   "metadata": {},
   "source": [
    "## 划分训练集、测试集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_frac = 0.2  # 测试集比例\n",
    "random.seed(666) # 随机数种子，便于复现\n",
    "\n",
    "folder = '.' #指定在当前目录下进行工作\n",
    "\n",
    "img_paths = os.listdir(folder)\n",
    "random.shuffle(img_paths) # 随机打乱图片排序\n",
    "\n",
    "val_number = int(len(img_paths) * test_frac) # 测试集文件个数\n",
    "train_files = img_paths[val_number:]         # 训练集文件名列表\n",
    "val_files = img_paths[:val_number]           # 测试集文件名列表\n",
    "\n",
    "print('数据集文件总数', len(img_paths))\n",
    "print('训练集文件个数', len(train_files))\n",
    "print('测试集文件个数', len(val_files))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7c275b31-12ae-4d62-a050-d7a9636b0439",
   "metadata": {},
   "source": [
    "## 将训练集图像移动到`images/trian`目录"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "66bdba3f-4bfb-4865-a427-e330fefec387",
   "metadata": {},
   "outputs": [],
   "source": [
    "os.mkdir('train')\n",
    "for each in tqdm(train_files):\n",
    "    shutil.move(each, 'train')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "18828a1d-01ab-4816-937b-292d3578e8f7",
   "metadata": {},
   "source": [
    "## 将测试集图像移动到`images/val`目录"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "962e949b-f578-4be7-9910-7177c308e8fe",
   "metadata": {},
   "outputs": [],
   "source": [
    "os.mkdir('val')\n",
    "for each in tqdm(val_files):\n",
    "    shutil.move(each, 'val')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6301413f-f5e2-4a29-b189-26a3dca72193",
   "metadata": {},
   "source": [
    "## 将训练集标注移动到`labelme_jsons/train`目录"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5ba779c6-5d24-4569-a2c5-93ffebbaed43",
   "metadata": {},
   "outputs": [],
   "source": [
    "os.chdir('../labels2obb')#labelme和images平级,标注文件和val，train文件夹平级"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cc4afdf6",
   "metadata": {},
   "outputs": [],
   "source": [
    "os.listdir()#列出里面的文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e96ff435-1705-4c4e-b63b-9f1488618e5a",
   "metadata": {},
   "outputs": [],
   "source": [
    "os.mkdir('train')\n",
    "for each in tqdm(train_files):\n",
    "    srt_path = each.split('.')[0] + '.txt'\n",
    "    shutil.move(srt_path, 'train')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5311ee76-c792-4b14-8e0f-6d7782ec1aa5",
   "metadata": {},
   "source": [
    "## 将测试集标注移动到`labelme_jsons/val`目录"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "df9f8b03-4f60-42b0-be3c-8e3b5605a7e9",
   "metadata": {},
   "outputs": [],
   "source": [
    "os.mkdir('val')\n",
    "for each in tqdm(val_files):\n",
    "    srt_path = each.split('.')[0] + '.txt'\n",
    "    shutil.move(srt_path, 'val')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "65a18861-f94e-4d96-a432-6ddbbea12fb6",
   "metadata": {},
   "outputs": [],
   "source": [
    "os.chdir('../../')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b04360b9-c43b-4a01-a093-8908be5ea0a8",
   "metadata": {},
   "source": [
    "## 删除系统自动生成的多余文件"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f049f6ef-d54d-4d1d-83e7-a8d23bcb85a0",
   "metadata": {},
   "source": [
    "### 查看待删除的多余文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "23c17bcc-bcbc-4ed1-a829-1b63e99d2b80",
   "metadata": {},
   "outputs": [],
   "source": [
    "!find . -iname '__MACOSX'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "69da9d50-11d1-4071-b818-dbb065a6474e",
   "metadata": {},
   "outputs": [],
   "source": [
    "!find . -iname '.DS_Store'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d4d6ca60-2948-4105-b635-e0d390814067",
   "metadata": {},
   "outputs": [],
   "source": [
    "!find . -iname '.ipynb_checkpoints'"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fedea526-5aac-42a6-b058-61b4ae4afe81",
   "metadata": {},
   "source": [
    "### 删除多余文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8d959813-5703-4e01-ba3e-f27b207d59f2",
   "metadata": {},
   "outputs": [],
   "source": [
    "!for i in `find . -iname '__MACOSX'`; do rm -rf $i;done"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d2ea0bef-6954-44d5-8478-680c8d7eded1",
   "metadata": {},
   "outputs": [],
   "source": [
    "!for i in `find . -iname '.DS_Store'`; do rm -rf $i;done"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7a2b568c-2685-4a13-90f6-185f60066360",
   "metadata": {},
   "outputs": [],
   "source": [
    "!for i in `find . -iname '.ipynb_checkpoints'`; do rm -rf $i;done"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c0f41291-369b-4dd2-8265-8b54a980f6ea",
   "metadata": {},
   "source": [
    "### 验证多余文件已删除"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2dea6d10-e56f-4775-a055-933d09a9adde",
   "metadata": {},
   "outputs": [],
   "source": [
    "!find . -iname '__MACOSX'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "23d47236-e846-42bd-9856-a0dd07a23621",
   "metadata": {},
   "outputs": [],
   "source": [
    "!find . -iname '.DS_Store'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a85571be-399b-4e6e-b00e-f3fea415a17e",
   "metadata": {},
   "outputs": [],
   "source": [
    "!find . -iname '.ipynb_checkpoints'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2c0f299b-1987-4ed4-8223-fe97898983c8",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
